package com.chatplus.application.service.basedata.impl;

import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.chatplus.application.common.domain.response.SensitiveWordFilterResultResponse;
import com.chatplus.application.common.util.ResourcesUtils;
import com.chatplus.application.dao.basedata.SensitiveWordDao;
import com.chatplus.application.domain.entity.basedata.SensitiveWordEntity;
import com.chatplus.application.service.basedata.SensitiveWordService;
import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.WordDictionary;
import com.chatplus.application.common.logging.SouthernQuietLogger;
import com.chatplus.application.common.logging.SouthernQuietLoggerFactory;
import net.bytebuddy.utility.RandomString;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;

/**
 * 敏感词表业务逻辑实现
 *
 * <p>Table: t_sensitive_word - 敏感词表</p>
 *
 * @author developer
 * @see SensitiveWordEntity
 */
@Service
public class SensitiveWordServiceImpl extends ServiceImpl<SensitiveWordDao, SensitiveWordEntity> implements SensitiveWordService {

    private static final SouthernQuietLogger LOGGER = SouthernQuietLoggerFactory.getLogger(SensitiveWordServiceImpl.class);

    private Map<String, String> dict = new ConcurrentHashMap<>();

    private final JiebaSegmenter segmenter = new JiebaSegmenter();

    public static final String ASTERISK = "*";

    @Override
    public void reloadAllWords() {
        LambdaQueryWrapper<SensitiveWordEntity> queryWrapper = new LambdaQueryWrapper<>();
        queryWrapper.isNull(SensitiveWordEntity::getDeletedAt);

        List<String> words = list(queryWrapper)
                .stream()
                .filter(w -> w != null && StringUtils.isNotBlank(w.getWord()))
                .map(SensitiveWordEntity::getWord)
                .distinct()
                .collect(Collectors.toList());

        reloadWords(words);
    }

    private synchronized void reloadWords(List<String> wordList) {
        Instant begin = Instant.now();

        Map<String, String> newDict = new ConcurrentHashMap<>();
        if (wordList == null) {
            wordList = Collections.emptyList();
        }
        wordList.forEach(word -> newDict.put(word, word));
        this.dict = newDict;

        WordDictionary wordDict = WordDictionary.getInstance();
        wordDict.freqs.clear();
        wordDict.loadDict();

        //加载自定义词典
        String jiebaCustomWordsContent = ResourcesUtils.getResourceContentString("data/jieba_custom_words.dict");
        if (StringUtils.isNotBlank(jiebaCustomWordsContent)) {
            String pathStr = System.getProperty("java.io.tmpdir") + File.separator + "jieba_custom_words" + RandomString.make();
            Path path = Paths.get(pathStr);
            try {
                Files.write(path, jiebaCustomWordsContent.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE);
                wordDict.loadUserDict(path);
                new File(pathStr).deleteOnExit();
            } catch (IOException e) {
                LOGGER.message("结巴分词加载自定义词典异常")
                        .context("path", path.toString())
                        .exception(e)
                        .error();
            }
        }

        //加载敏感词
        if (CollectionUtils.isNotEmpty(wordList)) {
            String lineSeparator = System.getProperty("line.separator");

            //把文件写入临时目录
            List<String> lineList = wordList.stream()
                    .map(word -> word + " 200" + lineSeparator)
                    .collect(Collectors.toList());

            String pathStr = System.getProperty("java.io.tmpdir") + File.separator + "sensitive_words_" + RandomString.make();
            Path path = Paths.get(pathStr);
            try {
                Files.write(path, lineList, StandardOpenOption.CREATE);

                wordDict.loadUserDict(path);

                new File(pathStr).deleteOnExit();
            } catch (Exception e) {
                LOGGER.message("结巴分词加载敏感词失败")
                        .context("path", path.toString())
                        .exception(e)
                        .error();
            }
        }

        LOGGER.message("结巴分词加载敏感词完成")
                .context("wordListSize", wordList.size())
                .context("spend", Duration.between(begin, Instant.now()))
                .info();
    }

    @Override
    public SensitiveWordFilterResultResponse filter(String input) {
        SensitiveWordFilterResultResponse result = new SensitiveWordFilterResultResponse();
        if (input == null) {
            return result;
        }
        List<String> sensitiveWordList = result.getSensitiveWordList();

        List<String> tokenList = segmenter.sentenceProcess(input.toLowerCase());

        int currentLength = 0;
        StringBuilder output = new StringBuilder();
        for (String word : tokenList) {
            if (StringUtils.isBlank(word)) {
                continue;
            }
            if (containsWord(word)) {
                output.append(ASTERISK);
                if (CollectionUtils.isEmpty(sensitiveWordList)) {
                    sensitiveWordList = new ArrayList<>();
                }
                sensitiveWordList.add(word);
                currentLength += word.length();
                continue;
            }
            output.append(input, currentLength, currentLength + word.length());
            currentLength += word.length();
        }

        result.setInput(input);
        result.setOutput(output.toString());
        result.setSensitiveWordList(sensitiveWordList);
        result.setContainsSensitiveWord(CollectionUtils.isNotEmpty(sensitiveWordList));
        LOGGER.message("敏感词过滤接口结果")
                .context("input", input)
                .context("output", result.getOutput())
                .context("sentenceProcess", tokenList)
                .debug();

        return result;
    }

    private boolean containsWord(String word) {
        return word != null && dict.get(word) != null;
    }


}
